{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 训练和验证数据集-数据预处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "导入模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dpath = '../data/'\n",
    "train = pd.read_csv(dpath+'Otto_train.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(61878, 95)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>feat_1</th>\n",
       "      <th>feat_2</th>\n",
       "      <th>feat_3</th>\n",
       "      <th>feat_4</th>\n",
       "      <th>feat_5</th>\n",
       "      <th>feat_6</th>\n",
       "      <th>feat_7</th>\n",
       "      <th>feat_8</th>\n",
       "      <th>feat_9</th>\n",
       "      <th>...</th>\n",
       "      <th>feat_85</th>\n",
       "      <th>feat_86</th>\n",
       "      <th>feat_87</th>\n",
       "      <th>feat_88</th>\n",
       "      <th>feat_89</th>\n",
       "      <th>feat_90</th>\n",
       "      <th>feat_91</th>\n",
       "      <th>feat_92</th>\n",
       "      <th>feat_93</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Class_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Class_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Class_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Class_1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>Class_1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 95 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  feat_1  feat_2  feat_3  feat_4  feat_5  feat_6  feat_7  feat_8  feat_9  \\\n",
       "0   1       1       0       0       0       0       0       0       0       0   \n",
       "1   2       0       0       0       0       0       0       0       1       0   \n",
       "2   3       0       0       0       0       0       0       0       1       0   \n",
       "3   4       1       0       0       1       6       1       5       0       0   \n",
       "4   5       0       0       0       0       0       0       0       0       0   \n",
       "\n",
       "    ...     feat_85  feat_86  feat_87  feat_88  feat_89  feat_90  feat_91  \\\n",
       "0   ...           1        0        0        0        0        0        0   \n",
       "1   ...           0        0        0        0        0        0        0   \n",
       "2   ...           0        0        0        0        0        0        0   \n",
       "3   ...           0        1        2        0        0        0        0   \n",
       "4   ...           1        0        0        0        0        1        0   \n",
       "\n",
       "   feat_92  feat_93   target  \n",
       "0        0        0  Class_1  \n",
       "1        0        0  Class_1  \n",
       "2        0        0  Class_1  \n",
       "3        0        0  Class_1  \n",
       "4        0        0  Class_1  \n",
       "\n",
       "[5 rows x 95 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>feat_1</th>\n",
       "      <th>feat_2</th>\n",
       "      <th>feat_3</th>\n",
       "      <th>feat_4</th>\n",
       "      <th>feat_5</th>\n",
       "      <th>feat_6</th>\n",
       "      <th>feat_7</th>\n",
       "      <th>feat_8</th>\n",
       "      <th>feat_9</th>\n",
       "      <th>...</th>\n",
       "      <th>feat_84</th>\n",
       "      <th>feat_85</th>\n",
       "      <th>feat_86</th>\n",
       "      <th>feat_87</th>\n",
       "      <th>feat_88</th>\n",
       "      <th>feat_89</th>\n",
       "      <th>feat_90</th>\n",
       "      <th>feat_91</th>\n",
       "      <th>feat_92</th>\n",
       "      <th>feat_93</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.00000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61878.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>30939.500000</td>\n",
       "      <td>0.38668</td>\n",
       "      <td>0.263066</td>\n",
       "      <td>0.901467</td>\n",
       "      <td>0.779081</td>\n",
       "      <td>0.071043</td>\n",
       "      <td>0.025696</td>\n",
       "      <td>0.193704</td>\n",
       "      <td>0.662433</td>\n",
       "      <td>1.011296</td>\n",
       "      <td>...</td>\n",
       "      <td>0.070752</td>\n",
       "      <td>0.532306</td>\n",
       "      <td>1.128576</td>\n",
       "      <td>0.393549</td>\n",
       "      <td>0.874915</td>\n",
       "      <td>0.457772</td>\n",
       "      <td>0.812421</td>\n",
       "      <td>0.264941</td>\n",
       "      <td>0.380119</td>\n",
       "      <td>0.126135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>17862.784315</td>\n",
       "      <td>1.52533</td>\n",
       "      <td>1.252073</td>\n",
       "      <td>2.934818</td>\n",
       "      <td>2.788005</td>\n",
       "      <td>0.438902</td>\n",
       "      <td>0.215333</td>\n",
       "      <td>1.030102</td>\n",
       "      <td>2.255770</td>\n",
       "      <td>3.474822</td>\n",
       "      <td>...</td>\n",
       "      <td>1.151460</td>\n",
       "      <td>1.900438</td>\n",
       "      <td>2.681554</td>\n",
       "      <td>1.575455</td>\n",
       "      <td>2.115466</td>\n",
       "      <td>1.527385</td>\n",
       "      <td>4.597804</td>\n",
       "      <td>2.045646</td>\n",
       "      <td>0.982385</td>\n",
       "      <td>1.201720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>15470.250000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>30939.500000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>46408.750000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>61878.000000</td>\n",
       "      <td>61.00000</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>64.000000</td>\n",
       "      <td>70.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>55.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>67.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>61.000000</td>\n",
       "      <td>130.000000</td>\n",
       "      <td>52.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>87.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 94 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id       feat_1        feat_2        feat_3        feat_4  \\\n",
       "count  61878.000000  61878.00000  61878.000000  61878.000000  61878.000000   \n",
       "mean   30939.500000      0.38668      0.263066      0.901467      0.779081   \n",
       "std    17862.784315      1.52533      1.252073      2.934818      2.788005   \n",
       "min        1.000000      0.00000      0.000000      0.000000      0.000000   \n",
       "25%    15470.250000      0.00000      0.000000      0.000000      0.000000   \n",
       "50%    30939.500000      0.00000      0.000000      0.000000      0.000000   \n",
       "75%    46408.750000      0.00000      0.000000      0.000000      0.000000   \n",
       "max    61878.000000     61.00000     51.000000     64.000000     70.000000   \n",
       "\n",
       "             feat_5        feat_6        feat_7        feat_8        feat_9  \\\n",
       "count  61878.000000  61878.000000  61878.000000  61878.000000  61878.000000   \n",
       "mean       0.071043      0.025696      0.193704      0.662433      1.011296   \n",
       "std        0.438902      0.215333      1.030102      2.255770      3.474822   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        0.000000      0.000000      0.000000      1.000000      0.000000   \n",
       "max       19.000000     10.000000     38.000000     76.000000     43.000000   \n",
       "\n",
       "           ...            feat_84       feat_85       feat_86       feat_87  \\\n",
       "count      ...       61878.000000  61878.000000  61878.000000  61878.000000   \n",
       "mean       ...           0.070752      0.532306      1.128576      0.393549   \n",
       "std        ...           1.151460      1.900438      2.681554      1.575455   \n",
       "min        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "25%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "50%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "75%        ...           0.000000      0.000000      1.000000      0.000000   \n",
       "max        ...          76.000000     55.000000     65.000000     67.000000   \n",
       "\n",
       "            feat_88       feat_89       feat_90       feat_91       feat_92  \\\n",
       "count  61878.000000  61878.000000  61878.000000  61878.000000  61878.000000   \n",
       "mean       0.874915      0.457772      0.812421      0.264941      0.380119   \n",
       "std        2.115466      1.527385      4.597804      2.045646      0.982385   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        1.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "max       30.000000     61.000000    130.000000     52.000000     19.000000   \n",
       "\n",
       "            feat_93  \n",
       "count  61878.000000  \n",
       "mean       0.126135  \n",
       "std        1.201720  \n",
       "min        0.000000  \n",
       "25%        0.000000  \n",
       "50%        0.000000  \n",
       "75%        0.000000  \n",
       "max       87.000000  \n",
       "\n",
       "[8 rows x 94 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAELCAYAAAARNxsIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHyxJREFUeJzt3X28VmWd7/HPFxDTysDAIh7aOLPtRGZFOzM95lMp2AxYR3vhNEHqiVOjZp0e0LEJR6OyJycrbShJrY5IlIlFIZlo04SCTyCoww5NtpBgoFIdNfQ3f6zrjsXu3nsvNuvea9/u7/v1ul97rd+61rp+NyK/vda11roUEZiZmZVhUNUJmJnZ84eLipmZlcZFxczMSuOiYmZmpXFRMTOz0riomJlZaVxUzMysNC4qZmZWGhcVMzMrzZCqE+hrI0aMiJaWlqrTMDNrKnfcccdjETGyp3YDrqi0tLSwcuXKqtMwM2sqkn5bpJ0vf5mZWWlcVMzMrDQuKmZmVhoXFTMzK42LipmZlcZFxczMSuOiYmZmpWlYUZE0T9JmSfd2ip8t6QFJayR9Phc/T1J72nZCLj4pxdolnZuLj5d0m6R1kq6VNLRR38XMzIpp5JnKlcCkfEDSMcBU4JCIeA3wxRSfAEwDXpP2uUzSYEmDga8Dk4EJwKmpLcDFwCUR0QpsA85o4HcxM7MCGvZEfUTcKqmlU/iDwOci4unUZnOKTwXmp/iDktqBQ9O29ohYDyBpPjBV0n3AscA/pDZXARcAlzfm2/Sthy98bZ/3Oe5Tq/u8TzN7/unrMZWDgCPTZatbJL0pxUcDG3LtOlKsq/hLgccjYkeneF2SZkpaKWnlli1bSvoqZmbWWV8XlSHAcOAw4OPAAkkCVKdt9CJeV0TMjYi2iGgbObLH96GZmVkv9fULJTuAH0ZEALdLeg4YkeJjc+3GABvTcr34Y8AwSUPS2Uq+vZmZVaSvz1R+RDYWgqSDgKFkBWIRME3S3pLGA63A7cAKoDXd6TWUbDB/USpKNwMnp+POAK7v029iZmZ/pWFnKpKuAY4GRkjqAGYD84B56TbjZ4AZqUCskbQAWAvsAM6MiGfTcc4ClgCDgXkRsSZ1MQuYL+nTwF3AFY36LmZmVkwj7/46tYtN/9hF+znAnDrxxcDiOvH17LxDzMzM+gE/UW9mZqVxUTEzs9K4qJiZWWlcVMzMrDQuKmZmVhoXFTMzK42LipmZlcZFxczMSuOiYmZmpXFRMTOz0riomJlZaVxUzMysNC4qZmZWGhcVMzMrjYuKmZmVxkXFzMxK07CiImmepM1plsfO2z4mKSSNSOuSdKmkdkmrJE3MtZ0haV36zMjF3yhpddrnUklq1HcxM7NiGnmmciUwqXNQ0ljg7cDDufBksnnpW4GZwOWp7f5k0xC/mWyWx9mShqd9Lk9ta/v9VV9mZta3Gjmd8K2SWupsugT4BHB9LjYVuDrNV79c0jBJo8jmuF8aEVsBJC0FJklaBuwXEb9O8auBk4CfNubbmDWvOf94ciX9nv/dhZX0a9Xq0zEVSVOARyLink6bRgMbcusdKdZdvKNO3MzMKtSwM5XOJO0LnA8cX29znVj0It5V3zPJLpUxbty4HnM1M7Pe6cszlb8BxgP3SHoIGAPcKenlZGcaY3NtxwAbe4iPqROvKyLmRkRbRLSNHDmyhK9iZmb19FlRiYjVEXFARLRERAtZYZgYEb8DFgHT011ghwFPRMQmYAlwvKThaYD+eGBJ2rZd0mHprq/p7DpGY2ZmFWjkLcXXAL8GXiWpQ9IZ3TRfDKwH2oFvAv8EkAboLwJWpM+FtUF74IPAt9I+v8GD9GZmlWvk3V+n9rC9JbccwJldtJsHzKsTXwkcvGdZmplZmfxEvZmZlcZFxczMSuOiYmZmpXFRMTOz0riomJlZaVxUzMysNC4qZmZWGhcVMzMrjYuKmZmVpseiIumFkgal5YMkTZG0V+NTMzOzZlPkTOVW4AWSRgM3AaeRzepoZma2iyJFRRHxJ+BdwFcj4p3AhMamZWZmzahQUZH0FuA9wE9SrM8m9zIzs+ZRpKh8GDgPuC4i1kg6ELi5sWmZmVkz6vGMIyJuAW6R9MK0vh74UKMTMzOz5lPk7q+3SFoL3JfWXyfpsoZnZmZmTafI5a9/A04Afg8QEfcAb21kUmZm1pwKPfwYERs6hZ7taR9J8yRtlnRvLvYFSfdLWiXpOknDctvOk9Qu6QFJJ+Tik1KsXdK5ufh4SbdJWifpWklDi3wXMzNrnCJFZYOkw4GQNFTSx0iXwnpwJTCpU2wpcHBEHAL8F9kNAEiaAEwDXpP2uUzSYEmDga8Dk8luYz41tQW4GLgkIlqBbcAZBXIyM7MGKlJUPkA2f/xooAN4PV3MJ58XEbcCWzvFboyIHWl1OTAmLU8F5kfE0xHxINAOHJo+7RGxPiKeAeYDUyUJOBZYmPa/CjipwHcxM7MGKnL312Nkz6iU7XTg2rQ8mqzI1HSkGMCGTvE3Ay8FHs8VqHx7MzOrSJG7v67qNPYxXNK8PelU0vnADuB7tVCdZtGLeFf9zZS0UtLKLVu27G66ZmZWUJHLX4dExOO1lYjYBryhtx1KmgH8HfCeiKgVgg5gbK7ZGGBjN/HHgGGShnSK1xURcyOiLSLaRo4c2dvUzcysB0WKyiBJw2srkvanl69pkTQJmAVMSe8Tq1kETJO0t6TxQCtwO7ACaE13eg0lG8xflIrRzcDJaf8ZwPW9ycnMzMpTpDh8CfhPSbVB8VOAOT3tJOka4GhghKQOYDbZ3V57A0uzsXaWR8QH0utfFgBryS6LnRkRz6bjnAUsAQYD8yJiTepiFjBf0qeBu4ArCnwXMzNroCID9VdLugM4hmws410RsbbAfqfWCXf5D39EzKFOsYqIxcDiOvH1ZHeHmZlZP1H0Mtb9ZM+CDAGQNC4iHm5YVmZm1pR6LCqSzia7dPUo2ZP0IrvT6pDGpmZmZs2myJnKOcCrIuL3jU7GzMyaW6HXtABPNDoRMzNrfkXOVNYDyyT9BHi6FoyILzcsKzMza0pFisrD6TM0fczMzOoqckvxvwJIemFE/LHxKZmZWbPyzI9mZlYaz/xoZmaladjMj2ZmNvAUGajfZeZH4EMUm/nRzMwGmIbN/GhmZgNPt2cqaY7490ZEI2Z+NDOz55luz1TS6+en9lEuZmbW5IqMqfxK0tfI5pP/y3MqEXFnw7IyM7OmVKSoHJ5+XpiLBXBs+emYmVkz62lMZRBweUQs6KN8zMysifU0pvIccFZvDixpnqTNku7NxfaXtFTSuvRzeIpL0qWS2iWtkjQxt8+M1H6dpBm5+BslrU77XKo0P7GZmVWnyC3FSyV9TNLYVBT2l7R/gf2uBCZ1ip0L3BQRrcBNaR1gMtCaPjOByyErQmQThL2ZbOrg2bVClNrMzO3XuS8zM+tjRcZUTk8/88+mBHBgdztFxK2SWjqFpwJHp+WrgGXArBS/OiICWC5pmKRRqe3SiNgKIGkpMEnSMmC/iPh1il8NnAT8tMD3MTOzBinyluLxJfb3sojYlI67SdIBKT6abDKwmo4U6y7eUSdel6SZZGc1jBs3bg+/gpmZdaXIHPXT68Uj4uoS86g3HhK9iNcVEXOBuQBtbW1dtjMzsz1T5PLXm3LLLwCOA+4EelNUHpU0Kp2ljAI2p3gHMDbXbgywMcWP7hRfluJj6rQ3M7MK9ThQHxFn5z7vB95A72eAXATU7uCaAVyfi09Pd4EdBjyRLpMtAY6XNDwN0B8PLEnbtks6LN31NT13LDMzq0iRM5XO/kR2t1W3JF1DdpYxQlIH2V1cnwMWSDqDbIriU1LzxcCJQHs6/mkAEbFV0kXAitTuwtqgPfBBsjvM9iEboPcgvZlZxYqMqdzAzvGKQcAEoMeHISPi1C42HVenbdDFm48jYh4wr058JXBwT3mYmVnfKXKm8sXc8g7gtxHR0VVjMzMbuIoUlYeBTRHxFICkfSS1RMRDDc3MzMyaTpEn6r8PPJdbfzbFzMzMdlGkqAyJiGdqK2m5t3d/mZnZ81iRorJF0pTaiqSpwGONS8nMzJpVkTGVDwDfSxN1QfbgYd2n7M3MbGAr8u6v3wCHSXoRoIjY3vi0zMysGfV4+UvSZyQNi4g/RMT29HT7p/siOTMzay5FxlQmR8TjtZWI2Eb29LuZmdkuihSVwZL2rq1I2gfYu5v2ZmY2QBUZqP8ucJOkb5O9ruV0sgm2zMzMdlFkoP7zklYBb0uhiyJiSWPTMjOzZlT0LcV3AXuRnanc1bh0zMysmRW5++vdwO3AycC7gdskndzoxMzMrPkUOVM5H3hTRGwGkDQS+DmwsJGJmZn1tQsuuGBA9dsIRe7+GlQrKMnvC+5nZmYDTJHi8DNJSyS9T9L7gJ+QzdTYa5I+ImmNpHslXSPpBZLGS7pN0jpJ10oamtrundbb0/aW3HHOS/EHJJ2wJzmZmdmeKzJH/ceBfwcOAV4HzI2IWb3tUNJo4ENAW0QcDAwGpgEXA5dERCuwDTgj7XIGsC0i/ha4JLVD0oS032uAScBlkgb3Ni8zM9tzhS5jRcQPI+L/RsRHIuK6EvodAuwjaQiwL7AJOJad4zRXASel5ansfC5mIXCcJKX4/Ih4OiIeJJvf/tAScjMzs17q87GRiHiEbIrih8mKyRPAHcDjEbEjNesARqfl0cCGtO+O1P6l+XidfczMrAJFn1MpjaThZGcZ44HHyWaRnFynadR26WJbV/F6fc4EZgKMGzduNzM2gCO+ekQl/f7q7F9V0q+Z9U6XZyqSbko/Ly65z7cBD0bEloj4M/BD4HBgWLocBjAG2JiWO4CxKZchwEuArfl4nX12ERFzI6ItItpGjhxZ8tcxM7Oa7i5/jZJ0FDBF0hskTcx/9qDPh8nmZ9k3jY0cB6wFbiZ7wBJgBnB9Wl6U1knbfxERkeLT0t1h44FWsoc0zcysIt1d/voUcC7ZGcCXO20LsoH13RYRt0laCNwJ7CB77ctcsluV56e5Wu4Crki7XAF8R1I72RnKtHScNZIWkBWkHcCZEfFsb3IyM7NydFlUImIhsFDSv0TERWV2GhGzgdmdwuupc/dWRDwFnNLFceYAc8rMzczMeq/IW4ovkjQFeGsKLYuIHzc2LTMza0ZFXij5WeAcsstMa4FzUszMzGwXRW4pfgfw+oh4DkDSVWRjHuc1MjEzM2s+RR9+HJZbfkkjEjEzs+ZX5Ezls8Bdkm4me+DwrfgsxczM6igyUH+NpGXAm8iKyqyI+F2jEzMzs+ZT6DUtEbGJ7GFDMzOzLnmyLTMzK42LipmZlabboiJpkKR7+yoZMzNrbt0WlfRsyj2S/L54MzPrUZGB+lHAGkm3A3+sBSNiSsOyMjOzplSkqPxrw7MwM7PnhSLPqdwi6ZVAa0T8XNK+wODGp2ZmZs2myAsl3w8sBP49hUYDP2pkUmZm1pyK3FJ8JnAE8CRARKwDDmhkUmZm1pyKFJWnI+KZ2kqaJz4al5KZmTWrIkXlFkn/DOwj6e3A94Eb9qRTScMkLZR0v6T7JL1F0v6Slkpal34OT20l6VJJ7ZJWSZqYO86M1H6dpBld92hmZn2hSFE5F9gCrAb+D7AY+OQe9vsV4GcR8T+A1wH3pX5uiohW4Ka0DjAZaE2fmcDlAJL2J5uS+M1k0xDPrhUiMzOrRpG7v55LE3PdRnbZ64GI6PXlL0n7kb0+/33p+M8Az0iaChydml0FLANmAVOBq1Ofy9NZzqjUdmlEbE3HXQpMAq7pbW5mZrZnitz99Q7gN8ClwNeAdkmT96DPA8nOfL4t6S5J35L0QuBl6W3Itbci124GGA1syO3fkWJdxc3MrCJFLn99CTgmIo6OiKOAY4BL9qDPIcBE4PKIeAPZU/rndtNedWLRTfyvDyDNlLRS0sotW7bsbr5mZlZQkaKyOSLac+vrgc170GcH0BERt6X1hWRF5tF0WYv0c3Ou/djc/mOAjd3E/0pEzI2ItohoGzly5B6kbmZm3emyqEh6l6R3kb33a7Gk96U7rG4AVvS2wzRr5AZJr0qh44C1ZJOA1e7gmgFcn5YXAdPTXWCHAU+ky2NLgOMlDU8D9MenmJmZVaS7gfq/zy0/ChyVlrcAe3qX1dnA9yQNJTvzOY2swC2QdAbwMHBKarsYOBFoB/6U2hIRWyVdxM4Cd2Ft0N7MzKrRZVGJiNMa1WlE3A201dl0XJ22QfZUf73jzAPmlZudmZn1Vo+3FEsaT3Zm0ZJv71ffm5lZZ0Veff8j4AqysZTnGpuOmZk1syJF5amIuLThmZiZWdMrUlS+Imk2cCPwdC0YEXc2LCszM2tKRYrKa4H3Asey8/JXpHUzM7O/KFJU3gkcmH/9vZmZWT1Fnqi/BxjW6ETMzKz5FTlTeRlwv6QV7Dqm4luKzcxsF0WKyuyGZ2FmZnUt+P6hlfT77lNu79V+ReZTuaVXRzYzswGnyBP129n5SvmhwF7AHyNiv0YmZmZmzafImcqL8+uSTiKbvtfMzGwXRe7+2kVE/Ag/o2JmZnUUufz1rtzqILK3C/d6jnozM3v+KnL3V35elR3AQ8DUhmRjZmZNrciYSsPmVTEzs+eXLouKpE91s19ExEUNyMfMzJpYdwP1f6zzATgDmLWnHUsaLOkuST9O6+Ml3SZpnaRr01TDSNo7rben7S25Y5yX4g9IOmFPczIzsz3TZVGJiC/VPsBcYB+y+eHnAweW0Pc5wH259YuBSyKiFdhGVrxIP7dFxN8Cl6R2SJoATANeA0wCLpM0uIS8zMysl7q9pVjS/pI+Dawiu1Q2MSJmRcTmPelU0hjgHcC30rrIblNemJpcBZyUlqemddL241L7qcD8iHg6Ih4E2vHzM2ZmleqyqEj6ArAC2A68NiIuiIhtJfX7b8An2Dk/y0uBxyNiR1rvAEan5dHABoC0/YnU/i/xOvuYmVkFujtT+SjwCuCTwEZJT6bPdklP9rZDSX8HbI6IO/LhOk2jh23d7dO5z5mSVkpauWXLlt3K18zMiuvy7q+I2O2n7Qs6Apgi6UTgBcB+ZGcuwyQNSWcjY4CNqX0HMBbokDQEeAmwNRevye+zi4iYSzYuRFtbmx/cNDNrkEYVji5FxHkRMSYiWsgG2n8REe8BbgZOTs1mANen5UVpnbT9FxERKT4t3R02HmgFeveuZjMzK0WRJ+r7yixgfrox4C7gihS/AviOpHayM5RpABGxRtICYC3Zk/5nRsSzfZ+2mZnVVFpUImIZsCwtr6fO3VsR8RRwShf7zwHmNC5DMzPbHX1++cvMzJ6/XFTMzKw0LipmZlYaFxUzMyuNi4qZmZXGRcXMzErjomJmZqVxUTEzs9K4qJiZWWn602tazGwAuW/OL/q8z1eff2yf9znQ+EzFzMxK4zMVa1q3vPWoSvo96tZbutz2tY/e0IeZ7HTWl/6+kn7NOvOZipmZlcZFxczMSuOiYmZmpXFRMTOz0vR5UZE0VtLNku6TtEbSOSm+v6Slktaln8NTXJIuldQuaZWkibljzUjt10ma0VWfZmbWN6o4U9kBfDQiXg0cBpwpaQJwLnBTRLQCN6V1gMlk88+3AjOByyErQsBs4M1kM0bOrhUiMzOrRp8XlYjYFBF3puXtwH3AaGAqcFVqdhVwUlqeClwdmeXAMEmjgBOApRGxNSK2AUuBSX34VczMrJNKx1QktQBvAG4DXhYRmyArPMABqdloYENut44U6ypuZmYVqayoSHoR8APgwxHxZHdN68Sim3i9vmZKWilp5ZYtW3Y/WTMzK6SSJ+ol7UVWUL4XET9M4UcljYqITeny1uYU7wDG5nYfA2xM8aM7xZfV6y8i5gJzAdra2v5SeN748av3+Lv0xh1fmF5Jv2ZmjVbF3V8CrgDui4gv5zYtAmp3cM0Ars/Fp6e7wA4DnkiXx5YAx0sangboj08xMzOrSBVnKkcA7wVWS7o7xf4Z+BywQNIZwMPAKWnbYuBEoB34E3AaQERslXQRsCK1uzAitvbNVzAzs3r6vKhExH9QfzwE4Lg67QM4s4tjzQPmlZedmZntCT9Rb2ZmpXFRMTOz0riomJlZaVxUzMysNC4qZmZWGhcVMzMrjYuKmZmVxkXFzMxK46JiZmalcVExM7PSuKiYmVlpXFTMzKw0LipmZlYaFxUzMyuNi4qZmZXGRcXMzErjomJmZqVp+qIiaZKkByS1Szq36nzMzAaypi4qkgYDXwcmAxOAUyVNqDYrM7OBq6mLCnAo0B4R6yPiGWA+MLXinMzMBqxmLyqjgQ259Y4UMzOzCigiqs6h1ySdApwQEf87rb8XODQizu7UbiYwM62+CnighO5HAI+VcJyy9ce8nFMxzqm4/pjX8z2nV0bEyJ4aDSmps6p0AGNz62OAjZ0bRcRcYG6ZHUtaGRFtZR6zDP0xL+dUjHMqrj/m5ZwyzX75awXQKmm8pKHANGBRxTmZmQ1YTX2mEhE7JJ0FLAEGA/MiYk3FaZmZDVhNXVQAImIxsLiCrku9nFai/piXcyrGORXXH/NyTjT5QL2ZmfUvzT6mYmZm/YiLipmZlWZAFhVJL5c0X9JvJK2VtFjSQZLubXC/p0haI+k5SW2dtlWV0xck3S9plaTrJA3rBzldlPK5W9KNkl7RaXsleeX6/5ikkDSi6pwkXSDpkfRndbekE6vOKfV9dnon3xpJn686J0nX5v6MHpJ0dz/I6fWSlqecVko6tNP2qvJ6naRfS1ot6QZJ++3WASJiQH0AAb8GPpCLvR44Eri3wX2/muzhy2VAWz/J6XhgSFq+GLi4H+S0X275Q8A3+sOfVeprLNndhr8FRlSdE3AB8LE68SpzOgb4ObB3Wj+g6pw65fcl4FNV5wTcCExOyycCy/rJf78VwFFp+XTgot3ZfyCeqRwD/DkivlELRMTd5F73IqlF0i8l3Zk+h6f4KEm3pt8s7pV0pKTBkq5M66slfaSrjiPivoio9zR/lTndGBE70upysgdIq87pydzqC4H83SSV5ZVcAnyin+VUT5U5fRD4XEQ8nfrd3A9yqh1fwLuBa/pBTgHUzgJewq4PbleZ16uAW9PyUuB/ddP2rzT9LcW9cDBwRw9tNgNvj4inJLWS/QVsA/4BWBIRc5S9IXlfst8eRkfEwQDKXT5qwpxOB67tDzlJmgNMB54g+x+sprK8JE0BHomIe7J/m6rPKTlL0nRgJfDRiNhWcU4HAUem/4ZPkZ1Jrag4p5ojgUcjYl1arzKnDwNLJH2RbCji8Ny2KvO6F5gCXA+cwq5vLenRQDxTKWIv4JuSVgPfJ3utPmSnhadJugB4bURsB9YDB0r6qqRJwJP1Dtjfc5J0PrAD+F5/yCkizo+IsSmfs3Yjp4bkJWlf4HzgU7uZS8NySi4H/obsH41NZJd2qs5pCDAcOAz4OLBAnapwBTnVnMrOs5SiGpXTB4GPpL/nHwGu6Cd5nQ6cKekO4MXAM7uVVSOvzfXHD3AccGudeAvpWiXZderabw9DgB25dq8A3g+sBqan2IvIThFvIHuqv6cclrHrmEqlOQEzyK7f7ttfcsod55XkriFXlRfwWrLfDB9Knx3Aw8DL+9GfVb6/ynICfgYcnVv/DTCy6j+ndLxHgTFV/31K7Z5g57OCAp7sD3l16u8g4PYibWufgXim8gtgb0nvrwUkvYnsH6+alwCbIuI54L1kr4BB0iuBzRHxTbLfKiYquwNoUET8APgXYGIz5ZR+a5kFTImIP/WTnFpzq1OA+6vOKyJWR8QBEdESES1kLzOdGBG/q/jPalRu9Z1kly4q+3NKfgQcm451EDCU7E25Vf+/9zbg/ojoyMWqzGkjcFRaPhZYl9tW5d+pA9LPQcAngW901bau3alAz5cPWRVfQPYb1BrgJ0ArO38DaAVWkQ1cfxb4Q4rPIPuf9i7gl8B44HXAncDd6TO5m37fSfaP0dNkvzEt6Qc5tZMN/tXafqMf5PSDtP8qst+qRveH/36dcniIdPdXxX9W3yH7bXQV2ctUR/WDnIYC303HuBM4tuqc0jGuJHc3VdU5Af+TbNzkHuA24I39JK9zgP9Kn8+RzqaKfvyaFjMzK81AvPxlZmYNMhBvKW44SV8HjugU/kpEfLuKfMA57Y7+mJdzKsY5FdeovHz5y8zMSuPLX2ZmVhoXFTMzK42LilmJJA2T9E990M/RSu96MutPXFTMyjUMKFxUlOnN/4dHs+u7osz6BQ/Um5VI0nxgKvAAcDNwCNl7sPYCPhkR10tqAX6atr8FOInsae9ZZE9ZrwOejoizJI0ke6J5XOriw8AjZA+8PQtsAc6OiF/2xfcz64mLilmJUsH4cUQcLGkI2fvUnkyvyVhO9hT0K8le8Hd4RCxXNgnZf5K9OmM72Ss67klF5f8Bl0XEf0gaR/YWhlenlwX+ISK+2Nff0aw7fk7FrHEEfEbSW4HngNHAy9K230bE8rR8KHBLRGwFkPR9shf5QXYGMyH3kt/9JL24L5I36w0XFbPGeQ/Z23nfGBF/lvQQ8IK07Y+5dt29Fn4Q8JaI+P/5YPE3yZv1LQ/Um5VrO9kcFJC9RXZzKijHsOsbZvNuB46SNDxdMsvPtHcjuflkJL2+Tj9m/YaLilmJIuL3wK8k3Us2cVabpJVkZy33d7HPI8BnyN5U+3NgLdlcGwAfSsdYJWkt8IEUvwF4p7IpY49s2Bcy200eqDfrByS9KCL+kM5UriObROm6qvMy210+UzHrHy6QdDfZPBgPkk10ZdZ0fKZiZmal8ZmKmZmVxkXFzMxK46JiZmalcVExM7PSuKiYmVlpXFTMzKw0/w2DGEFX2FTwMgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(train.target);\n",
    "plt.xlabel('target');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAELCAYAAAARNxsIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAG2VJREFUeJzt3X+UVeV97/H3B9BIjApEdBF+FGxJUmv8OVESe42RBtHkivHG1rQ3UMu6tFnGmCZpxbQ33EjTahtNYpuYsiIREiOhVCNNUEKImNtcfzCAgoAuJmhhCpEx+AO1atDv/WM/JxyGMzN7hr1nz5HPa62zztnf8+x9vocF58vez7OfRxGBmZlZEQZVnYCZmb1xuKiYmVlhXFTMzKwwLipmZlYYFxUzMyuMi4qZmRXGRcXMzArjomJmZoVxUTEzs8IMqTqB/nbsscfG+PHjq07DzKxprFmz5umIGJmn7SFXVMaPH09ra2vVaZiZNQ1J/5G3rS9/mZlZYVxUzMysMC4qZmZWGBcVMzMrjIuKmZkVxkXFzMwK46JiZmaFKbWoSBomaYmkxyRtlvQeSSMkrZC0JT0PT20l6SZJbZLWSzq97jgzUvstkmbUxc+QtCHtc5Mklfl9zMyse2WfqXwVuCci3gmcAmwGZgMrI2IisDJtA1wATEyPWcDNAJJGAHOAs4AzgTm1QpTazKrbb2rJ38fMzLpR2h31ko4GzgH+GCAiXgVelTQNODc1WwCsAq4GpgELIyKAB9JZzqjUdkVE7E7HXQFMlbQKODoi7k/xhcDFwN15czzjLxYe1HfsqzX/ML2SzzUzK1uZZyonAB3AtyStk/RNSUcCx0fEToD0fFxqPxrYXrd/e4p1F29vEDczs4qUWVSGAKcDN0fEacCL7LvU1Uij/pDoQ/zAA0uzJLVKau3o6Og+azMz67Myi0o70B4RD6btJWRF5ql0WYv0vKuu/di6/ccAO3qIj2kQP0BEzIuIlohoGTky10SbZmbWB6UVlYj4BbBd0jtSaDKwCVgK1EZwzQDuSq+XAtPTKLBJwHPp8thyYIqk4amDfgqwPL23R9KkNOpret2xzMysAmVPfX8lcJukw4GtwOVkhWyxpJnANuDS1HYZcCHQBryU2hIRuyXNBVandtfWOu2BjwO3AkPJOuhzd9KbmVnxSi0qEfEw0NLgrckN2gZwRRfHmQ/MbxBvBU46yDTNzKwgvqPezMwK46JiZmaFcVExM7PCuKiYmVlhXFTMzKwwLipmZlYYFxUzMyuMi4qZmRXGRcXMzArjomJmZoVxUTEzs8K4qJiZWWFcVMzMrDAuKmZmVhgXFTMzK4yLipmZFcZFxczMCuOiYmZmhXFRMTOzwriomJlZYVxUzMysMC4qZmZWGBcVMzMrjIuKmZkVxkXFzMwK46JiZmaFKbWoSHpS0gZJD0tqTbERklZI2pKeh6e4JN0kqU3Sekmn1x1nRmq/RdKMuvgZ6fhtaV+V+X3MzKx7/XGm8v6IODUiWtL2bGBlREwEVqZtgAuAiekxC7gZsiIEzAHOAs4E5tQKUWozq26/qeV/HTMz60oVl7+mAQvS6wXAxXXxhZF5ABgmaRRwPrAiInZHxDPACmBqeu/oiLg/IgJYWHcsMzOrQNlFJYAfSVojaVaKHR8ROwHS83EpPhrYXrdve4p1F29vED+ApFmSWiW1dnR0HORXMjOzrgwp+fhnR8QOSccBKyQ91k3bRv0h0Yf4gcGIecA8gJaWloZtzMzs4JV6phIRO9LzLuBOsj6Rp9KlK9LzrtS8HRhbt/sYYEcP8TEN4mZmVpHSioqkIyUdVXsNTAEeBZYCtRFcM4C70uulwPQ0CmwS8Fy6PLYcmCJpeOqgnwIsT+/tkTQpjfqaXncsMzOrQJmXv44H7kyjfIcA342IeyStBhZLmglsAy5N7ZcBFwJtwEvA5QARsVvSXGB1andtROxOrz8O3AoMBe5ODzMzq0iPRSWdZfxXRLwu6e3AO4G7I+JX3e0XEVuBUxrEfwlMbhAP4IoujjUfmN8g3gqc1NN3MDOz/pHn8tdPgSMkjSa7r+RysrMDMzOz/eQpKoqIl4BLgH+MiA8DJ5ablpmZNaNcRUXSe4A/An6YYmUPRTYzsyaUp6h8CrgGuDMiNko6Abi33LTMzKwZ9XjGERH3AfelDvtaB/wny07MzMyaT49nKpLeI2kTsDltnyLp66VnZmZmTSfP5a+vkE3q+EuAiHgEOKfMpMzMrDnluqM+IrZ3Cr1WQi5mZtbk8ozi2i7pvUBIOpysP2VzuWmZmVkzynOm8mdkd7rXppo/lS7ufDczs0NbntFfT5Pdo2JmZtatPKO/FkgaVrc9XNIB83CZmZnlufx1ckQ8W9tIS/qeVl5KZmbWrPIUlUFpHRMAJI3A07SYmVkDeYrDDcD/k7QkbV8KfLG8lMzMrFnl6ahfKGkN8H6ydeEviYhNpWdmZmZNJ+9lrMeAZ2rtJY2LiG2lZWVmZk0pz8qPVwJzgKfI7qQXEMDJ5aZmZmbNJs+ZylXAO9IywGZmZl3KM/prO/Bc2YmYmVnzy3OmshVYJemHwCu1YETcWFpWZmbWlPIUlW3pcXh6mJmZNZRnSPEXACQdGREvlp+SmZk1K6/8aGZmhfHKj2ZmVpjSV36UNFjSOkk/SNsTJD0oaYuk76WFv5D0prTdlt4fX3eMa1L8cUnn18WnplibpNl5czIzs3LkGlJcv/KjpM/Su5Ufr+rU/nrgyxExkewu/ZkpPhN4JiJ+C/hyaoekE4HLgN8BpgJfT4VqMPA14ALgROCjqa2ZmVWk1JUfJY0BPgh8M20LOA+oTU65ALg4vZ6WtknvT07tpwGLIuKViHgCaAPOTI+2iNgaEa8Ci1JbMzOrSLejv9LZwMcioq8rP34F+EvgqLT9VuDZiNibttvJihXpeTtAROyV9FxqPxp4oO6Y9fts7xQ/q4vvMQuYBTBu3Lg+fhUzM+tJt2cqEfEaffzfv6QPAbsiYk19uNHH9PBeb+MHBiPmRURLRLSMHDmym6zNzOxg5Ln58WeS/gn4HvDr+1QiYm0P+50NXCTpQuAI4GiyM5dhkoaks5UxwI7Uvh0YC7RLGgIcA+yui9fU79NV3MzMKpCnT+W9ZJ3k15It2HUD8KWedoqIayJiTESMJ+to/0m6jHYv8JHUbAZwV3q9NG2T3v9JRESKX5ZGh00AJgIPAauBiWk02eHpM5bm+D5mZlaSnvpUBgE3R8TiAj/zamCRpL8B1gG3pPgtwLcltZGdoVwGEBEbJS0GNgF7gSvSZTkkfQJYDgwG5kfExgLzNDOzXuq2qETE6+mH+6CKSkSsAlal11vJRm51bvMy2VLFjfb/Ig2WMI6IZcCyg8nNzMyKk+fy1wpJn5U0VtKI2qP0zMzMrOnk6aj/k/Rcf29KACcUn46ZmTWzPLMUT+iPRMzMrPnlWaN+eqN4RCwsPh0zM2tmeS5/vbvu9RHAZGAt4KJiZmb7yXP568r6bUnHAN8uLSMzM2tauaa+7+QlshsQzczM9pOnT+Xf2Den1iCyaeaLvBnSzMzeIPL0qdRPybIX+I+IaC8pHzMza2J5iso2YGe64x1JQyWNj4gnS83MzMyaTp4+lX8BXq/bfi3FzMzM9pOnqAxJKysCkF4fXl5KZmbWrPIUlQ5JF9U2JE0Dni4vJTMza1Z5+lT+DLgtLdQF2aJZDe+yNzOzQ1uemx9/DkyS9BZAEbGn/LTMzKwZ9Xj5S9LfShoWES9ExB5Jw9MCW2ZmZvvJ06dyQUQ8W9uIiGeAC8tLyczMmlWeojJY0ptqG5KGAm/qpr2ZmR2i8nTUfwdYKelbZNO1/AmwoNSszMysKeXpqP97SeuB30uhuRGxvNy0zMysGeU5UwFYBxxGdqayrrx0zMysmeUZ/fX7wEPAR4DfBx6U9JGyEzMzs+aT50zlr4B3R8QuAEkjgR8DS8pMzMzMmk+e0V+DagUl+WXO/czM7BCT50zlHknLgdvT9h8Ay8pLyczMmlWPZxwR8RfAPwMnA6cA8yLi6p72k3SEpIckPSJpo6QvpPgESQ9K2iLpe5IOT/E3pe229P74umNdk+KPSzq/Lj41xdokze7tlzczs2LlGv0VEXcAd/Ty2K8A50XEC5IOA/5d0t3Ap4EvR8QiSd8AZgI3p+dnIuK3JF0GXA/8gaQTgcuA3wHeBvxY0tvTZ3wN+ADZJJerJS2NiE29zNPMzApSWt9IZF5Im4exb0jyeezr5F8AXJxeT2PfTZVLgMmSlOKLIuKViHgCaAPOTI+2iNia1nhZlNqamVlFSu1wlzRY0sPALmAF8HPg2YjYm5q0A6PT69HAdoD0/nPAW+vjnfbpKm5mZhXpsqhIWpmer+/rwSPitYg4FRhDdmbx242a1T6yi/d6Gz+ApFmSWiW1dnR09Jy4mZn1SXd9KqMkvQ+4SNIiOv2IR8TavB8SEc9KWgVMAoZJGpLORsYAO1KzdmAs0C5pCHAMsLsuXlO/T1fxzp8/D5gH0NLS0rDwmJnZweuuqHwemE32Y31jp/dqfSNdSjdJ/ioVlKFkc4ddD9xLdnf+ImAGcFfaZWnavj+9/5OICElLge9KupGso34i2R3+AiZKmgD8J1ln/h/m+dJmZlaOLotKRCwBlkj63xExtw/HHgUskDSY7DLb4oj4gaRNwKK00Nc64JbU/hbg25LayM5QLkt5bJS0GNgE7AWuiIjXACR9AlgODAbmR8TGPuRpZmYFyTNL8VxJFwHnpNCqiPhBjv3WA6c1iG8l61/pHH8ZuLSLY30R+GKD+DJ8I6aZ2YCRZ0LJvwOuIjtT2ARclWJmZmb7yXPz4weBUyPidQBJC8guW11TZmJmZtZ88t6nMqzu9TFlJGJmZs0vz5nK3wHrJN1LNuLqHHyWYmZmDeTpqL893WPybrKicnVE/KLsxMzMrPnknVByJ9l9JGZmZl3yYltmZlYYFxUzMytMt0VF0iBJj/ZXMmZm1ty6LSrp3pRHJI3rp3zMzKyJ5emoHwVslPQQ8GItGBEXlZaVmZk1pTxF5QulZ2FmZm8Iee5TuU/SbwATI+LHkt5MNiuwmZnZfvJMKPm/yNaM/+cUGg18v8ykzMysOeUZUnwFcDbwPEBEbAGOKzMpMzNrTnmKyisR8WptIy316yV5zczsAHmKyn2SPgcMlfQB4F+Afys3LTMza0Z5ispsoAPYAPwp2UqLf11mUmZm1pzyjP56PS3M9SDZZa/HI8KXv8zM7AA9FhVJHwS+AfycbOr7CZL+NCLuLjs5MzNrLnlufrwBeH9EtAFI+k3gh4CLipmZ7SdPn8quWkFJtgK7SsrHzMyaWJdnKpIuSS83SloGLCbrU7kUWN0PuZmZWZPp7vLXf697/RTwvvS6AxheWkZmZta0uiwqEXF5fyZiZmbNL8/cXxMk3SjpDklLa48c+42VdK+kzZI2SroqxUdIWiFpS3oenuKSdJOkNknrJZ1ed6wZqf0WSTPq4mdI2pD2uUmS+vbHYGZmRcgz+uv7wC1kd9G/3otj7wU+ExFrJR0FrJG0AvhjYGVEXCdpNtnNlVcDFwAT0+Ms4GbgLEkjgDlAC1mfzhpJSyPimdRmFvAA2U2ZU/GoNDOzyuQpKi9HxE29PXBE7AR2ptd7JG0mm+F4GnBuarYAWEVWVKYBC9ONlQ9IGiZpVGq7IiJ2A6TCNFXSKuDoiLg/xRcCF+OiYmZWmTxF5auS5gA/Al6pBSNibd4PkTQeOI3srvzjU8EhInZKqs14PBrYXrdbe4p1F29vEDczs4rkKSrvAj4GnMe+y1+Rtnsk6S3AvwKfiojnu+n2aPRG9CHeKIdZZJfJGDduXE8pm5lZH+UpKh8GTqif/j4vSYeRFZTbIuKOFH5K0qh0ljKKfTdStgNj63YfA+xI8XM7xVel+JgG7Q8QEfOAeQAtLS2et8zMrCR57qh/BBjW2wOnkVi3AJsj4sa6t5YCtRFcM4C76uLT0yiwScBz6TLZcmCKpOFppNgUYHl6b4+kSemzptcdy8zMKpDnTOV44DFJq9m/T+WiHvY7m+yy2QZJD6fY54DrgMWSZgLbyO7Qh2z01oVAG/AScHn6nN2S5rLvLv5ra532wMeBW4GhZB307qQ3M6tQnqIypy8Hjoh/p3G/B8DkBu2DbOniRseaD8xvEG8FTupLfmZmVrw866nc1x+JmJlZ88uznsoe9o2qOhw4DHgxIo4uMzEzM2s+ec5UjqrflnQxcGZpGZmZWdPKM/prPxHxfXLeo2JmZoeWPJe/LqnbHMS+ObjMzMz2k2f0V/26KnuBJ8nm6TIzM9tPnj4Vr6tiZma5dLec8Oe72S8iYm4J+ZiZWRPr7kzlxQaxI4GZwFsBFxUzM9tPd8sJ31B7nRbZuops6pRFwA1d7WdmZoeubvtU0qqLnwb+iGxBrdPTiotmZmYH6K5P5R+AS8imjH9XRLzQb1mZmVlT6u7mx88AbwP+Gtgh6fn02CPp+f5Jz8zMmkl3fSq9vtvezMwObS4cZmZWGBcVMzMrjIuKmZkVxkXFzMwK46JiZmaFcVExM7PCuKiYmVlhXFTMzKwwLipmZlYYFxUzMyuMi4qZmRWmtKIiab6kXZIerYuNkLRC0pb0PDzFJekmSW2S1ks6vW6fGan9Fkkz6uJnSNqQ9rlJksr6LmZmlk+ZZyq3AlM7xWYDKyNiIrAybQNcAExMj1nAzfDr9VzmAGcBZwJzaoUotZlVt1/nzzIzs35WWlGJiJ8CuzuFp5Et9kV6vrguvjAyDwDDJI0CzgdWRMTutDjYCmBqeu/oiLg/IgJYWHcsMzOrSH/3qRwfETsB0vNxKT4a2F7Xrj3Fuou3N4ibmVmFBkpHfaP+kOhDvPHBpVmSWiW1dnR09DFFMzPrSX8XlafSpSvS864UbwfG1rUbA+zoIT6mQbyhiJgXES0R0TJy5MiD/hJmZtZYfxeVpUBtBNcM4K66+PQ0CmwS8Fy6PLYcmCJpeOqgnwIsT+/tkTQpjfqaXncsMzOrSJfLCR8sSbcD5wLHSmonG8V1HbBY0kxgG3Bpar4MuBBoA14CLgeIiN2S5gKrU7trI6LW+f9xshFmQ4G708PMzCpUWlGJiI928dbkBm0DuKKL48wH5jeItwInHUyOZmZWrIHSUW9mZm8ALipmZlYYFxUzMyuMi4qZmRXGRcXMzArjomJmZoVxUTEzs8K4qJiZWWFcVMzMrDAuKmZmVhgXFTMzK4yLipmZFcZFxczMCuOiYmZmhXFRMTOzwriomJlZYVxUzMysMC4qZmZWGBcVMzMrjIuKmZkVxkXFzMwK46JiZmaFcVExM7PCuKiYmVlhXFTMzKwwTV9UJE2V9LikNkmzq87HzOxQNqTqBA6GpMHA14APAO3AaklLI2JTtZn13bZr31XJ5477/IZKPtfM3lia/UzlTKAtIrZGxKvAImBaxTmZmR2ymvpMBRgNbK/bbgfOqiiXN7Sz//Hsfv/Mn135s37/TDM7OM1eVNQgFgc0kmYBs9LmC5IeL+CzjwWe7suO+tKMAj6+oT7nxJxGf5SF6VNe+uTAy6lkzimfgZgTDMy8isrpN/I2bPai0g6MrdseA+zo3Cgi5gHzivxgSa0R0VLkMQ/WQMwJBmZezikf55TfQMyripyavU9lNTBR0gRJhwOXAUsrzsnM7JDV1GcqEbFX0ieA5cBgYH5EbKw4LTOzQ1ZTFxWAiFgGLKvgowu9nFaQgZgTDMy8nFM+zim/gZhXv+ekiAP6tc3MzPqk2ftUzMxsAHFR6YOBNjWMpPmSdkl6tOpcaiSNlXSvpM2SNkq6agDkdISkhyQ9knL6QtU51UgaLGmdpB9UnUuNpCclbZD0sKTWqvMBkDRM0hJJj6W/W+8ZADn9efr79Kik2yUdUVEeB/wOSBohaYWkLel5eNl5uKj0Ut3UMBcAJwIflXRitVlxKzC14hw62wt8JiJ+G5gEXDEA/pxeAc6LiFOAU4GpkiZVnFPNVcDmqpNo4P0RceoAGir7VeCeiHgncAoV/5lJGg18EmiJiJPIBgxdVlE6t3Lg78BsYGVETARWpu1Suaj03oCbGiYifgrsrjKHziJiZ0SsTa/3kP3jH11xThERL6TNw9Kj8k5FSWOADwLfrDqXgUzS0cA5wC0AEfFqRDxbbVZANuBpqKQhwJtpcK9cf+jid2AasCC9XgBcXHYeLiq912hqmEp/LAc6SeOB04AHq83k15eZHgZ2ASsiovKcgK8Afwm8XnUinQTwI0lr0qwUVTsB6AC+lS4VflPSkVUmFBH/CXwJ2AbsBJ6LiB9VmVMnx0fETsj+owccV/YHuqj0Xq6pYSwj6S3AvwKfiojnq84nIl6LiFPJZl84U9JJVeYj6UPArohYU2UeXTg7Ik4nu9R7haRzKs5nCHA6cHNEnAa8SD9czulO6qOYBkwA3gYcKel/VplT1VxUei/X1DAGkg4jKyi3RcQdVedTL102WUX1fVFnAxdJepLsUup5kr5TbUqZiNiRnncBd5Jd+q1SO9Bed3a5hKzIVOn3gCcioiMifgXcAby34pzqPSVpFEB63lX2B7qo9J6nhslBksiufW+OiBurzgdA0khJw9LroWQ/CI9VmVNEXBMRYyJiPNnfpZ9EROX/05V0pKSjaq+BKUClowsj4hfAdknvSKHJQNVrJ20DJkl6c/o7P5mBNeBiKVCbwXYGcFfZH9j0d9T3t4E4NYyk24FzgWMltQNzIuKWKnMi+x/4x4ANqQ8D4HNpBoSqjAIWpBF8g4DFETFghvAOMMcDd2a/kwwBvhsR91SbEgBXArel/9BtBS6vMpmIeFDSEmAt2YjHdVR0Z32j3wHgOmCxpJlkBfDS0vPwHfVmZlYUX/4yM7PCuKiYmVlhXFTMzKwwLipmZlYYFxUzMyuMi4qZmRXGRcWsYJI+maZlv62X+42X9Ic52p0s6f403fqGqqZaN2vE96mYFUzSY8AFEfFEL/c7F/hsRHyomzZDyG60+1hEPCLprcCzEfHaweRsVhSfqZgVSNI3yGbTXSrpr9LCSavTrLrTUpvxkv6vpLXpUZsr6jrgv6VFsf68i4+YAqyPiEcAIuKXLig2kPhMxaxgaXLIFuDTwKaI+E6ac+whsiUAAng9Il6WNBG4PSJacp6pfAo4g2wK85HAooj4+1K/kFkveO4vs/JMIZuB+LNp+whgHNms1v8k6VTgNeDtvTjmEOB3gXcDLwErJa2JiJXFpW3Wdy4qZuUR8D8i4vH9gtL/AZ4iWw53EPByL47ZDtwXEU+nYy0jm/7dRcUGBPepmJVnOXBlmhIdSael+DHAzoh4nWwm58Epvgc4KscxT05TrQ8B3kf107+b/ZqLill55gKHAeslPZq2Ab4OzJD0ANmlrxdTfD2wV9IjXXXUR8QzwI1k6/o8DKyNiB+W+B3MesUd9WZmVhifqZiZWWHcUW82AEk6H7i+U/iJiPhwFfmY5eXLX2ZmVhhf/jIzs8K4qJiZWWFcVMzMrDAuKmZmVhgXFTMzK8z/B3VwU/MLbolAAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(train.feat_6);\n",
    "plt.xlabel('feat_6');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "数据预处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feat_1</th>\n",
       "      <th>feat_2</th>\n",
       "      <th>feat_3</th>\n",
       "      <th>feat_4</th>\n",
       "      <th>feat_5</th>\n",
       "      <th>feat_6</th>\n",
       "      <th>feat_7</th>\n",
       "      <th>feat_8</th>\n",
       "      <th>feat_9</th>\n",
       "      <th>feat_10</th>\n",
       "      <th>...</th>\n",
       "      <th>feat_84</th>\n",
       "      <th>feat_85</th>\n",
       "      <th>feat_86</th>\n",
       "      <th>feat_87</th>\n",
       "      <th>feat_88</th>\n",
       "      <th>feat_89</th>\n",
       "      <th>feat_90</th>\n",
       "      <th>feat_91</th>\n",
       "      <th>feat_92</th>\n",
       "      <th>feat_93</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>22</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 93 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   feat_1  feat_2  feat_3  feat_4  feat_5  feat_6  feat_7  feat_8  feat_9  \\\n",
       "0       1       0       0       0       0       0       0       0       0   \n",
       "1       0       0       0       0       0       0       0       1       0   \n",
       "2       0       0       0       0       0       0       0       1       0   \n",
       "3       1       0       0       1       6       1       5       0       0   \n",
       "4       0       0       0       0       0       0       0       0       0   \n",
       "\n",
       "   feat_10   ...     feat_84  feat_85  feat_86  feat_87  feat_88  feat_89  \\\n",
       "0        0   ...           0        1        0        0        0        0   \n",
       "1        0   ...           0        0        0        0        0        0   \n",
       "2        0   ...           0        0        0        0        0        0   \n",
       "3        1   ...          22        0        1        2        0        0   \n",
       "4        0   ...           0        1        0        0        0        0   \n",
       "\n",
       "   feat_90  feat_91  feat_92  feat_93  \n",
       "0        0        0        0        0  \n",
       "1        0        0        0        0  \n",
       "2        0        0        0        0  \n",
       "3        0        0        0        0  \n",
       "4        1        0        0        0  \n",
       "\n",
       "[5 rows x 93 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 特征X和标签y\n",
    "X = train.iloc[:,1:94]\n",
    "y = train['target']\n",
    "train_id = train['id']\n",
    "feat_names = X.columns\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Program Files\\Anaconda\\lib\\site-packages\\sklearn\\preprocessing\\data.py:625: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.partial_fit(X, y)\n",
      "D:\\Program Files\\Anaconda\\lib\\site-packages\\sklearn\\base.py:462: DataConversionWarning: Data with input dtype int64 were all converted to float64 by StandardScaler.\n",
      "  return self.fit(X, **fit_params).transform(X)\n"
     ]
    }
   ],
   "source": [
    "# 标准化\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "ss = StandardScaler()\n",
    "X = ss.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存预处理结果\n",
    "train = pd.concat([train_id,pd.DataFrame(columns=feat_names,data=X),y],axis=1)\n",
    "train.to_csv('./data/'+'Otto_FE_train.csv',index=False,header=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
